In [4]:
import pandas as pd
In [5]:
df=pd.read_excel(r"D:\pitf.world.20160101-20200229.xlsx\pitf.world.20160101-20200229.xlsx",skiprows=2)
In [6]:
df
Out[6]:
Event Type Campaign Identifier Event Reporting Start Day Start Month Start Year End Day End Month End Year Country ... Link Primary Source Type Primary Source Secondary Source Type Secondary Source Contesting Source Type Contesting Source Citation Comments Coder
0 Incident NaN Eyewitness Account 6 1 2016 99 99 9999 AFG ... NaN BBC International Afghan Islamic Press news agency, Peshawar Local NaN NaN BBCSAP0020160107ec170025t NaN PAS
1 Incident NaN Eyewitness Account 17 1 2016 99 99 9999 AFG ... NaN BBC, AFP, Reuters, NYT International Shamshad TV, Kabul, Afghan Channel One (1TV), ... Local NaN NaN BBCMNF0020160117ec1h000rv, AFPR000020160117ec1... NaN PAS
2 Incident NaN Eyewitness Account 20 1 2016 99 99 9999 AFG ... NaN BBC, AFP, Reuters, AP International Tolo TV, Kabul, Tolo News, Kabul, Voice of Jih... Local NaN NaN BBCMNF0020160120ec1k003e9, BBCMNF0020160120ec1... NaN PAS
3 Incident NaN Eyewitness Account 3 1 2016 99 99 9999 BDI ... NaN BBC International Radio France Internationale, Paris Local NaN NaN BBCAP00020160104ec14000gq NaN PAS
4 Incident NaN Eyewitness Account 15 1 2016 99 99 9999 BFA ... NaN AFP, Reuters, AP, CNN International NaN NaN NaN NaN AFPR000020160115ec1f00bcd, AFPR000020160116ec1... NaN PAS
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
7451 Campaign NaN Secondary Account 25 2 2020 99 99 9999 SYR ... NaN BBC, AFP, AP, CNN International BBC Monitoring 25 Feb 20, BBC Monitoring in Ar... Local NaN NaN BBCMEP0020200225eg2p001h3, AFPR000020200225eg2... Only region used for geo-coding. BEB
7452 Incident NaN Eyewitness Account 8 2 2020 99 99 9999 THA ... NaN AFP, CNN, Reuters, AP, NYT International NaN NaN NaN NaN AFPR000020200208eg28006y1, AFPR000020200208eg2... NaN BEB
7453 Incident NaN Secondary Account 2 2 2020 99 99 9999 TZA ... NaN Reuters, BBC, All Africa International Mwananchi, Dar es Salaam, 1 Feb 20 Local NaN NaN LBA0000020200202eg22007k9, BBCMNF0020200202eg2... NaN BEB
7454 Incident NaN Eyewitness Account 15 2 2020 99 99 9999 YEM ... NaN Reuters, BBC, AP, AFP International BBC Monitoring in Arabic 1156 gmt 15 Feb 20, B... Local NaN NaN LBA0000020200215eg2f00dw1, BBCMNF0020200215eg2... Only district used for geo-coding. BEB
7455 Incident NaN Secondary Account 1 2 2020 99 99 9999 ZAF ... NaN AFP International NaN NaN NaN NaN AFPR000020200201eg21007y5 NaN BEB

7456 rows × 73 columns

In [7]:
df.shape
Out[7]:
(7456, 73)
In [8]:
df.describe
Out[8]:
<bound method NDFrame.describe of      Event Type Campaign Identifier     Event Reporting Start Day  \
0      Incident                 NaN  Eyewitness Account         6   
1      Incident                 NaN  Eyewitness Account        17   
2      Incident                 NaN  Eyewitness Account        20   
3      Incident                 NaN  Eyewitness Account         3   
4      Incident                 NaN  Eyewitness Account        15   
...         ...                 ...                 ...       ...   
7451   Campaign                 NaN   Secondary Account        25   
7452   Incident                 NaN  Eyewitness Account         8   
7453   Incident                 NaN   Secondary Account         2   
7454   Incident                 NaN  Eyewitness Account        15   
7455   Incident                 NaN   Secondary Account         1   

      Start Month  Start Year  End Day  End Month  End Year Country  ... Link  \
0               1        2016       99         99      9999     AFG  ...  NaN   
1               1        2016       99         99      9999     AFG  ...  NaN   
2               1        2016       99         99      9999     AFG  ...  NaN   
3               1        2016       99         99      9999     BDI  ...  NaN   
4               1        2016       99         99      9999     BFA  ...  NaN   
...           ...         ...      ...        ...       ...     ...  ...  ...   
7451            2        2020       99         99      9999     SYR  ...  NaN   
7452            2        2020       99         99      9999     THA  ...  NaN   
7453            2        2020       99         99      9999     TZA  ...  NaN   
7454            2        2020       99         99      9999     YEM  ...  NaN   
7455            2        2020       99         99      9999     ZAF  ...  NaN   

             Primary Source Type Primary Source  \
0                            BBC  International   
1         BBC, AFP, Reuters, NYT  International   
2          BBC, AFP, Reuters, AP  International   
3                            BBC  International   
4          AFP, Reuters, AP, CNN  International   
...                          ...            ...   
7451           BBC, AFP, AP, CNN  International   
7452  AFP, CNN, Reuters, AP, NYT  International   
7453    Reuters, BBC, All Africa  International   
7454       Reuters, BBC, AP, AFP  International   
7455                         AFP  International   

                                  Secondary Source Type  Secondary Source  \
0            Afghan Islamic Press news agency, Peshawar             Local   
1     Shamshad TV, Kabul, Afghan Channel One (1TV), ...             Local   
2     Tolo TV, Kabul, Tolo News, Kabul, Voice of Jih...             Local   
3                    Radio France Internationale, Paris             Local   
4                                                   NaN               NaN   
...                                                 ...               ...   
7451  BBC Monitoring 25 Feb 20, BBC Monitoring in Ar...             Local   
7452                                                NaN               NaN   
7453                 Mwananchi, Dar es Salaam, 1 Feb 20             Local   
7454  BBC Monitoring in Arabic 1156 gmt 15 Feb 20, B...             Local   
7455                                                NaN               NaN   

      Contesting Source Type Contesting Source  \
0                        NaN               NaN   
1                        NaN               NaN   
2                        NaN               NaN   
3                        NaN               NaN   
4                        NaN               NaN   
...                      ...               ...   
7451                     NaN               NaN   
7452                     NaN               NaN   
7453                     NaN               NaN   
7454                     NaN               NaN   
7455                     NaN               NaN   

                                               Citation  \
0                             BBCSAP0020160107ec170025t   
1     BBCMNF0020160117ec1h000rv, AFPR000020160117ec1...   
2     BBCMNF0020160120ec1k003e9, BBCMNF0020160120ec1...   
3                             BBCAP00020160104ec14000gq   
4     AFPR000020160115ec1f00bcd, AFPR000020160116ec1...   
...                                                 ...   
7451  BBCMEP0020200225eg2p001h3, AFPR000020200225eg2...   
7452  AFPR000020200208eg28006y1, AFPR000020200208eg2...   
7453  LBA0000020200202eg22007k9, BBCMNF0020200202eg2...   
7454  LBA0000020200215eg2f00dw1, BBCMNF0020200215eg2...   
7455                          AFPR000020200201eg21007y5   

                                Comments  Coder  
0                                    NaN    PAS  
1                                    NaN    PAS  
2                                    NaN    PAS  
3                                    NaN    PAS  
4                                    NaN    PAS  
...                                  ...    ...  
7451    Only region used for geo-coding.    BEB  
7452                                 NaN    BEB  
7453                                 NaN    BEB  
7454  Only district used for geo-coding.    BEB  
7455                                 NaN    BEB  

[7456 rows x 73 columns]>
In [9]:
df = df.rename(columns={
    "Start Day": "start_day",
    "Start Month": "start_month",
    "Start Year": "start_year",
    "Event Type": "event_type",
    "Country": "country",
    "Region": "region",
    "District": "district",
    "Deaths Number": "fatalities",
    "Description": "summary"
})
In [10]:
df
Out[10]:
event_type Campaign Identifier Event Reporting start_day start_month start_year End Day End Month End Year country ... Link Primary Source Type Primary Source Secondary Source Type Secondary Source Contesting Source Type Contesting Source Citation Comments Coder
0 Incident NaN Eyewitness Account 6 1 2016 99 99 9999 AFG ... NaN BBC International Afghan Islamic Press news agency, Peshawar Local NaN NaN BBCSAP0020160107ec170025t NaN PAS
1 Incident NaN Eyewitness Account 17 1 2016 99 99 9999 AFG ... NaN BBC, AFP, Reuters, NYT International Shamshad TV, Kabul, Afghan Channel One (1TV), ... Local NaN NaN BBCMNF0020160117ec1h000rv, AFPR000020160117ec1... NaN PAS
2 Incident NaN Eyewitness Account 20 1 2016 99 99 9999 AFG ... NaN BBC, AFP, Reuters, AP International Tolo TV, Kabul, Tolo News, Kabul, Voice of Jih... Local NaN NaN BBCMNF0020160120ec1k003e9, BBCMNF0020160120ec1... NaN PAS
3 Incident NaN Eyewitness Account 3 1 2016 99 99 9999 BDI ... NaN BBC International Radio France Internationale, Paris Local NaN NaN BBCAP00020160104ec14000gq NaN PAS
4 Incident NaN Eyewitness Account 15 1 2016 99 99 9999 BFA ... NaN AFP, Reuters, AP, CNN International NaN NaN NaN NaN AFPR000020160115ec1f00bcd, AFPR000020160116ec1... NaN PAS
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
7451 Campaign NaN Secondary Account 25 2 2020 99 99 9999 SYR ... NaN BBC, AFP, AP, CNN International BBC Monitoring 25 Feb 20, BBC Monitoring in Ar... Local NaN NaN BBCMEP0020200225eg2p001h3, AFPR000020200225eg2... Only region used for geo-coding. BEB
7452 Incident NaN Eyewitness Account 8 2 2020 99 99 9999 THA ... NaN AFP, CNN, Reuters, AP, NYT International NaN NaN NaN NaN AFPR000020200208eg28006y1, AFPR000020200208eg2... NaN BEB
7453 Incident NaN Secondary Account 2 2 2020 99 99 9999 TZA ... NaN Reuters, BBC, All Africa International Mwananchi, Dar es Salaam, 1 Feb 20 Local NaN NaN LBA0000020200202eg22007k9, BBCMNF0020200202eg2... NaN BEB
7454 Incident NaN Eyewitness Account 15 2 2020 99 99 9999 YEM ... NaN Reuters, BBC, AP, AFP International BBC Monitoring in Arabic 1156 gmt 15 Feb 20, B... Local NaN NaN LBA0000020200215eg2f00dw1, BBCMNF0020200215eg2... Only district used for geo-coding. BEB
7455 Incident NaN Secondary Account 1 2 2020 99 99 9999 ZAF ... NaN AFP International NaN NaN NaN NaN AFPR000020200201eg21007y5 NaN BEB

7456 rows × 73 columns

In [11]:
# Convert DMS (Degrees, Minutes, Seconds) + Direction into Decimal coordinates
def dms_to_decimal(deg, min_, sec, direction):
    try:
        dec = deg + min_ / 60 + sec / 3600
        if direction in ['S', 'W']:
            dec = -dec
        return dec
    except:
        return np.nan
In [12]:
# Apply for latitude and longitude
df['latitude'] = df.apply(lambda row: dms_to_decimal(
    row.get('Degrees'), row.get('Minutes'), row.get('Seconds'), row.get('Direction')), axis=1)

df['longitude'] = df.apply(lambda row: dms_to_decimal(
    row.get('Degrees.1'), row.get('Minutes.1'), row.get('Seconds.1'), row.get('Direction.1')), axis=1)

# Create full event date from day, month, year
df['event_date'] = pd.to_datetime(dict(
    year=df['start_year'], month=df['start_month'], day=df['start_day']), errors='coerce')
In [13]:
df
Out[13]:
event_type Campaign Identifier Event Reporting start_day start_month start_year End Day End Month End Year country ... Secondary Source Type Secondary Source Contesting Source Type Contesting Source Citation Comments Coder latitude longitude event_date
0 Incident NaN Eyewitness Account 6 1 2016 99 99 9999 AFG ... Afghan Islamic Press news agency, Peshawar Local NaN NaN BBCSAP0020160107ec170025t NaN PAS 36.934167 66.113889 2016-01-06
1 Incident NaN Eyewitness Account 17 1 2016 99 99 9999 AFG ... Shamshad TV, Kabul, Afghan Channel One (1TV), ... Local NaN NaN BBCMNF0020160117ec1h000rv, AFPR000020160117ec1... NaN PAS 34.434167 70.447778 2016-01-17
2 Incident NaN Eyewitness Account 20 1 2016 99 99 9999 AFG ... Tolo TV, Kabul, Tolo News, Kabul, Voice of Jih... Local NaN NaN BBCMNF0020160120ec1k003e9, BBCMNF0020160120ec1... NaN PAS 34.533333 69.166667 2016-01-20
3 Incident NaN Eyewitness Account 3 1 2016 99 99 9999 BDI ... Radio France Internationale, Paris Local NaN NaN BBCAP00020160104ec14000gq NaN PAS -3.411944 29.375833 2016-01-03
4 Incident NaN Eyewitness Account 15 1 2016 99 99 9999 BFA ... NaN NaN NaN NaN AFPR000020160115ec1f00bcd, AFPR000020160116ec1... NaN PAS 12.357222 -1.535278 2016-01-15
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
7451 Campaign NaN Secondary Account 25 2 2020 99 99 9999 SYR ... BBC Monitoring 25 Feb 20, BBC Monitoring in Ar... Local NaN NaN BBCMEP0020200225eg2p001h3, AFPR000020200225eg2... Only region used for geo-coding. BEB 35.766667 36.666667 2020-02-25
7452 Incident NaN Eyewitness Account 8 2 2020 99 99 9999 THA ... NaN NaN NaN NaN AFPR000020200208eg28006y1, AFPR000020200208eg2... NaN BEB 14.970556 102.101944 2020-02-08
7453 Incident NaN Secondary Account 2 2 2020 99 99 9999 TZA ... Mwananchi, Dar es Salaam, 1 Feb 20 Local NaN NaN LBA0000020200202eg22007k9, BBCMNF0020200202eg2... NaN BEB 3.350000 37.333333 2020-02-02
7454 Incident NaN Eyewitness Account 15 2 2020 99 99 9999 YEM ... BBC Monitoring in Arabic 1156 gmt 15 Feb 20, B... Local NaN NaN LBA0000020200215eg2f00dw1, BBCMNF0020200215eg2... Only district used for geo-coding. BEB 16.095833 44.566667 2020-02-15
7455 Incident NaN Secondary Account 1 2 2020 99 99 9999 ZAF ... NaN NaN NaN NaN AFPR000020200201eg21007y5 NaN BEB -26.202222 28.043611 2020-02-01

7456 rows × 76 columns

In [14]:
# Keep only important columns
df_clean=df[['event_type', 'event_date', 'country', 'region', 'district',
    'fatalities', 'latitude', 'longitude', 'summary']]

# Remove rows without coordinates
df_clean = df_clean.dropna(subset=['latitude', 'longitude'])
Out[14]:
(7455, 9)
In [15]:
# Convert fatalities to numeric
df_clean['fatalities'] = pd.to_numeric(df_clean['fatalities'], errors='coerce')

# Drop rows with missing or zero fatalities
df_clean = df_clean[df_clean['fatalities'] > 0]

# Check the shape
df_clean.shape
Out[15]:
(7455, 9)
In [16]:
# Total events
total_events = df_clean.shape[0]

# Unique countries affected
total_countries = df_clean['country'].nunique()

# Total fatalities
total_fatalities = df_clean['fatalities'].sum()

# Date range
date_min = df_clean['event_date'].min()
date_max = df_clean['event_date'].max()

# Print all
print("Total Atrocity Events:", total_events)
print("Total Countries Affected:", total_countries)
print("Total Civilian Fatalities:", int(total_fatalities))
print("Data Time Range:", date_min.date(), "to", date_max.date())
Total Atrocity Events: 7455
Total Countries Affected: 98
Total Civilian Fatalities: 65954
Data Time Range: 2016-01-01 to 2020-12-31
In [17]:
import matplotlib.pyplot as plt
import seaborn as sns

# Group by country
top_countries = df_clean.groupby('country')['fatalities'].sum().sort_values(ascending=False).head(10)

# Plot
plt.figure(figsize=(10, 6))
sns.barplot(x=top_countries.values, y=top_countries.index, palette='Reds_r')
plt.title("Top 10 Countries by Civilian Fatalities (2016–2020)")
plt.xlabel("Total Fatalities")
plt.ylabel("Country")
plt.tight_layout()
plt.show()
C:\Users\ADMIN\AppData\Local\Temp\ipykernel_2116\4069373457.py:9: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=top_countries.values, y=top_countries.index, palette='Reds_r')
No description has been provided for this image
In [18]:
# Extract year
df_clean['year'] = df_clean['event_date'].dt.year

# Group by year
yearly_deaths = df_clean.groupby('year')['fatalities'].sum()

# Plot
plt.figure(figsize=(8,5))
sns.lineplot(x=yearly_deaths.index, y=yearly_deaths.values, marker='o', color='darkred')
plt.title("Year-wise Fatalities")
plt.xlabel("Year")
plt.ylabel("Total Fatalities")
plt.grid(True)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [20]:
pip install folium
Collecting folium
  Downloading folium-0.20.0-py2.py3-none-any.whl.metadata (4.2 kB)
Collecting branca>=0.6.0 (from folium)
  Downloading branca-0.8.1-py3-none-any.whl.metadata (1.5 kB)
Requirement already satisfied: jinja2>=2.9 in c:\users\admin\appdata\local\programs\python\python313\lib\site-packages (from folium) (3.1.4)
Requirement already satisfied: numpy in c:\users\admin\appdata\local\programs\python\python313\lib\site-packages (from folium) (2.2.5)
Requirement already satisfied: requests in c:\users\admin\appdata\local\programs\python\python313\lib\site-packages (from folium) (2.32.3)
Requirement already satisfied: xyzservices in c:\users\admin\appdata\local\programs\python\python313\lib\site-packages (from folium) (2024.9.0)
Requirement already satisfied: MarkupSafe>=2.0 in c:\users\admin\appdata\local\programs\python\python313\lib\site-packages (from jinja2>=2.9->folium) (3.0.2)
Requirement already satisfied: charset-normalizer<4,>=2 in c:\users\admin\appdata\local\programs\python\python313\lib\site-packages (from requests->folium) (3.4.0)
Requirement already satisfied: idna<4,>=2.5 in c:\users\admin\appdata\local\programs\python\python313\lib\site-packages (from requests->folium) (3.10)
Requirement already satisfied: urllib3<3,>=1.21.1 in c:\users\admin\appdata\local\programs\python\python313\lib\site-packages (from requests->folium) (2.2.3)
Requirement already satisfied: certifi>=2017.4.17 in c:\users\admin\appdata\local\programs\python\python313\lib\site-packages (from requests->folium) (2024.8.30)
Downloading folium-0.20.0-py2.py3-none-any.whl (113 kB)
Downloading branca-0.8.1-py3-none-any.whl (26 kB)
Installing collected packages: branca, folium
Successfully installed branca-0.8.1 folium-0.20.0
Note: you may need to restart the kernel to use updated packages.
[notice] A new release of pip is available: 24.3.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip
In [23]:
import folium
from folium.plugins import MarkerCluster

# Create base map
map_base = folium.Map(
    location=[df_clean['latitude'].median(), df_clean['longitude'].median()],
    zoom_start=2
)

# Create marker cluster
marker_cluster = MarkerCluster().add_to(map_base)

# Use .iterrows() instead of .loc[]
for _, row in df_clean.iterrows():
    try:
        folium.Marker(
            location=[row['latitude'], row['longitude']],
            popup=f"{row['country']} – {int(row['fatalities'])} deaths\n{str(row['summary'])[:100]}",
            icon=folium.Icon(color='red', icon='info-sign')
        ).add_to(marker_cluster)
    except:
        pass  # skip rows with bad data

# Show the map
map_base
Out[23]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [22]:
plt.figure(figsize=(12, 6))
sns.scatterplot(
    x=df_clean['longitude'],
    y=df_clean['latitude'],
    size=df_clean['fatalities'],
    hue=df_clean['fatalities'],
    sizes=(10, 200),
    palette='Reds',
    alpha=0.6,
    legend=False
)

plt.title('Global Map of Atrocities (2016–2020)')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.grid(True)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [24]:
map_base.save("atrocity_map.html")
In [ ]: